In [1]:
# Data downloaded from: http://berkeleyearth.lbl.gov/auto/Global/Gridded/Complete_TAVG_EqualArea.nc
!md5sum /tmp/Complete_TAVG_EqualArea.nc
In [2]:
%pylab inline
from netCDF4 import Dataset
from numpy import array, size, average
from scipy.stats import nanmean
def running_average(temp, n=5, m=6, unc=False):
"""
Calculates centered running average from -n to +m points (total of n+m+1 points).
Example 1: if n=m=6 and the data is monthly, then the result is a centered 13 month running average.
Example 2: if n=5, m=6 and the data is monthly, then the result is a "centered" 12 month moving average
(another way to center it is with n=6, m=5).
unc ... if True, calculate the average of uncertainties
"""
avg = temp.copy()
for i in range(n, size(temp)-m):
window = temp[i-n:i+m+1]
if unc:
avg[i] = sqrt(sum(window**2))/len(window)
else:
avg[i] = average(window)
avg[:n] = NaN
avg[-m:] = NaN
return avg
def month_to_year(temp, unc=False):
return running_average(temp, 5, 6, unc) # 5+6+1 = 12 months
def month_to_10years(temp, unc=False):
return running_average(temp, 59, 60, unc) #59+60+1 = 120 months
def month_to_20years(temp, unc=False):
return running_average(temp, 119, 120, unc) #119+120+1 = 240 months
In [3]:
rootgrp = Dataset('/tmp/Complete_TAVG_EqualArea.nc')
rootgrp.variables
Out[3]:
In [4]:
longitude = rootgrp.variables["longitude"][:]
latitude = rootgrp.variables["latitude"][:]
time = rootgrp.variables["time"][:]
month_number = rootgrp.variables["month_number"][:]
land_mask = rootgrp.variables["land_mask"][:]
temperature = rootgrp.variables["temperature"][:]
longitude2 = rootgrp.variables["longitude2"][:]
In [5]:
Tavg = nanmean(temperature, axis=1)
T0 = 8.79
In [6]:
Tk = temperature+273.15+T0
r = 4
Tavg4 = nanmean(Tk**r, axis=1)**(1./r)-273.15
In [7]:
Tavg+T0
Out[7]:
In [8]:
Tavg4
Out[8]:
In [9]:
figure(figsize=(8, 6))
plot(time, Tavg + T0, "k-", label="Tavg")
plot(time, Tavg4, "b-", label="Tavg4")
xlabel("Year")
ylabel("Monthly temperature [$^\circ C$]")
title("Berkeley Averaging method")
xlim([1750, 2015])
grid()
legend();
In [10]:
figure(figsize=(8, 6))
plot(time, month_to_year(Tavg)+T0, "k-")
plot(time, month_to_year(Tavg4), "b-")
xlabel("Year")
ylabel("Annual temperature [$^\circ C$]")
title("Berkeley Averaging method")
xlim([1750, 2015])
ylim([6.5, 10.5])
grid()
In [11]:
figure(figsize=(8, 6))
plot(time, month_to_10years(Tavg)+T0, "k-")
plot(time, month_to_10years(Tavg4), "b-")
xlabel("Year")
ylabel("Decadal temperature [$^\circ C$]")
title("Berkeley Averaging method")
xlim([1750, 2015])
ylim([7.5, 10])
grid()
Let's compare against the original Full_TAVG_complete.txt file:
In [12]:
D = loadtxt("data/Full_TAVG_complete.txt", comments="%")
# Read from file:
T0 = 8.79
# Construct floating point years using years + months
years_int = D[:, 0]
months = D[:, 1]
years = years_int + (months-0.5) / 12
temp_month = D[:, 2]
unc_month = D[:, 3]
Check that the years exactly agree:
In [13]:
max(years-time)
Out[13]:
In [14]:
figure(figsize=(8, 6))
plot(time, temp_month + T0, "b-", label="Full_TAVG_complete.txt")
plot(time, Tavg + T0, "k-", label="our")
xlabel("Year")
ylabel("Monthly temperature [$^\circ C$]")
title("Berkeley Averaging method")
xlim([1750, 2015])
grid()
legend();
Let's plot the errors:
In [15]:
figure(figsize=(8, 6))
semilogy(time, abs(temp_month-Tavg), "k-")
xlabel("Year")
ylabel("Monthly temperature differences [$^\circ C$]")
title("Berkeley Averaging method")
xlim([1750, 2015])
grid();
As can be seen, we are able to get similar temperatures, typically 1C off around the year 1800, and 0.1C off from 1850 and later.